#--------------------------------------------------------------------------------------
#
# metabolism.R - code to compare ATG and OT data for potential signes of metabolism
#
# December 2014
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
#--------------------------------------------------------------------------------------
library(grDevices)
library(RColorBrewer)
library(stringr)
library(pca3d)
library(openxlsx)
source("utils.R")

#--------------------------------------------------------------------------------------
#
# prepare the data
#
#--------------------------------------------------------------------------------------
prep.data <- function(zcut=2,poscut=2) {
	filename <- "../metabolism/ER superMatrix 2015-01-09.xlsx"
	SUPERMATRIX <<- read.xlsx(filename)
	nchem <- dim(SUPERMATRIX)[1]
	chems <- SUPERMATRIX[,c(1,2,3,4,6,7,8,13)]
	rownames(chems) <- chems[,"CODE"]
	a.list1 <- c("ATG_ERa_TRANS_up","ATG_ERE_CIS_up")
	a.list2 <- c("OT_ER_ERaERa_0480","OT_ER_ERaERa_1440","OT_ER_ERaERb_0480","OT_ER_ERaERb_1440","OT_ER_ERbERb_0480","OT_ER_ERbERb_1440")
	a.list3 <- c("NVS_NR_bER","NVS_NR_hER","NVS_NR_mERa","Tox21_ERa_BLA_Agonist_ratio","Tox21_ERa_LUC_BG1_Agonist","ACEA_T47D_80hr_Positive")

	n1 <- length(a.list1)
	n2 <- length(a.list2)
	n3 <- length(a.list3)

	ac50.list1 <- a.list1
	for(i in 1:n1) ac50.list1[i] <- paste(ac50.list1[i],"_AC50",sep="")
	ac50.list2 <- a.list2
	for(i in 1:n2) ac50.list2[i] <- paste(ac50.list2[i],"_AC50",sep="")
	ac50.list3 <- a.list3
	for(i in 1:n3) ac50.list3[i] <- paste(ac50.list3[i],"_AC50",sep="")
	
	z.list1 <- a.list1
	for(i in 1:n1) z.list1[i] <- paste(z.list1[i],"_Zscore",sep="")
	z.list2 <- a.list2
	for(i in 1:n2) z.list2[i] <- paste(z.list2[i],"_Zscore",sep="")
	z.list3 <- a.list3
	for(i in 1:n3) z.list3[i] <- paste(z.list3[i],"_Zscore",sep="")
	
	adata1 <- SUPERMATRIX[,ac50.list1]
	adata2 <- SUPERMATRIX[,ac50.list2]
	adata3 <- SUPERMATRIX[,ac50.list3]
	zdata1 <- SUPERMATRIX[,z.list1]
	zdata2 <- SUPERMATRIX[,z.list2]
	zdata3 <- SUPERMATRIX[,z.list3]
	rownames(adata1) <- chems[,"CODE"]
	rownames(zdata2) <- chems[,"CODE"]
	rownames(zdata3) <- chems[,"CODE"]
	rownames(adata1) <- chems[,"CODE"]
	rownames(zdata2) <- chems[,"CODE"]
	rownames(zdata3) <- chems[,"CODE"]

	zdata1[is.na(zdata1)] <- 0
	zdata1[zdata1<zcut] <- 0
	zdata1[zdata1>0] <- 1
	zdata2[is.na(zdata2)] <- 0
	zdata2[zdata2<zcut] <- 0
	zdata2[zdata2>0] <- 1
	zdata3[is.na(zdata2)] <- 0
	zdata3[zdata3<zcut] <- 0
	zdata3[zdata3>0] <- 1
	
	adata1[is.na(adata1)] <- 1000000
	adata2[is.na(adata2)] <- 1000000
	adata3[is.na(adata3)] <- 1000000
	for(i in n1) {
		acol <- ac50.list1[i]
		zcol <- z.list1[i]
		atemp <- adata1[,acol]
		ztemp <- zdata1[,zcol]
		atemp[ztemp==0] <- 1000000
		adata1[,acol] <- atemp
	}
	for(i in n2) {
		acol <- ac50.list2[i]
		zcol <- z.list2[i]
		atemp <- adata2[,acol]
		ztemp <- zdata2[,zcol]
		atemp[ztemp==0] <- 1000000
		adata2[,acol] <- atemp
	}
	for(i in n3) {
		acol <- ac50.list3[i]
		zcol <- z.list3[i]
		atemp <- adata3[,acol]
		ztemp <- zdata3[,zcol]
		atemp[ztemp==0] <- 1000000
		adata3[,acol] <- atemp
	}
	name.list <- c("ATG.pos","ATG.most.potent","ATG.least.potent","OT.pos","OT.most.potent","OT.least.potent","Other.pos","Other.most.potent","Other.least.potent","class","ring","iclass")
	summat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
	names(summat) <- name.list
	rownames(summat) <- chems[,"CODE"]
	for(i in 1:nchem) {
		code <- chems[i,"CODE"]
		atemp <- as.numeric(adata1[i,])
		count <- 0
		for(j in 1:n1) if(atemp[j]<1000000) count <- count+1
		if(count==2) {
			summat[i,"ATG.pos"] <- 2
			summat[i,"ATG.most.potent"] <- -log10(min(atemp/1000000))
			summat[i,"ATG.least.potent"] <- -log10(max(atemp/1000000))
		}
		else {
			summat[i,"ATG.pos"] <- 0
			summat[i,"ATG.most.potent"] <- 0
			summat[i,"ATG.least.potent"] <- 0
		}

		atemp <- as.numeric(adata2[i,])
		count <- 0
		for(j in 1:n2) if(atemp[j]<1000000) count <- count+1
		if(count>=poscut) {
			summat[i,"OT.pos"] <- count
			atemp <- atemp[atemp<1000000]
			summat[i,"OT.most.potent"] <- -log10(min(atemp/1000000))
			summat[i,"OT.least.potent"] <- -log10(max(atemp/1000000))
		}
		else {
			summat[i,"OT.pos"] <- 0
			summat[i,"OT.most.potent"] <- 0
			summat[i,"OT.least.potent"] <- 0
		}
		
		atemp <- as.numeric(adata3[i,])
		count <- 0
		for(j in 1:n3) if(atemp[j]<1000000) count <- count+1
		if(count>=poscut) {
			summat[i,"Other.pos"] <- count
			atemp <- atemp[atemp<1000000]
			summat[i,"Other.most.potent"] <- -log10(min(atemp/1000000))
			summat[i,"Other.least.potent"] <- -log10(max(atemp/1000000))
		}
		else {
			summat[i,"Other.pos"] <- 0
			summat[i,"Other.most.potent"] <- 0
			summat[i,"Other.least.potent"] <- 0
		}
			
		if(summat[i,"ATG.pos"]==0) {
			if(summat[i,"OT.pos"]==0 && summat[i,"Other.pos"]==0) {
				summat[i,"class"] <- "Inactive"
			}
			else {
				summat[i,"class"] <- "Potential Bio-Inactivation"
			}
		}
		else {
			if(summat[i,"OT.pos"]==0 && summat[i,"Other.pos"]==0) {
				summat[i,"class"] <- "Potential Bio-Activation"
			}
			else {
				if(summat[i,"ATG.least.potent"]>max(summat[i,"OT.most.potent"],summat[i,"Other.most.potent"])+1) summat[i,"class"] <- "Potential Bio-Activation"
				if(min(summat[i,"OT.least.potent"],summat[i,"Other.least.potent"])>summat[i,"ATG.most.potent"]+1) summat[i,"class"] <- "Potential Bio-Inactivation"
				else summat[i,"class"] <- "Parent Active"
			}
		}
		#print(chems[i,"Name"])
		#print(summat[i,])
		#browser()
	}
	combi <- cbind(chems,summat)
	filename <- "../metabolism/metabolism_matrix_v2.xlsx"
	write.xlsx(combi,file=filename, row.names=F)		

}
